
һ
import org.apache.spark.sql.SparkSession
val spark=SparkSession.builder().getOrCreate()
import spark.implicits._
val df = spark.read.json("file:///usr/local/spark/employee.json")


(1)df.show()
(2)df.distinct().show()
(3)df.drop("id").show()
(4)df.filter(df("age") > 30 ).show()
(5)df.groupBy("name").count().show()
(6)df.sort(df("name").asc).show()
(7)df.take(3) scala> df.head(3)
(8)df.select(df("name").as("username")).show()
(9)df.agg("age"->"avg")
(10)df.agg("age"->"min")




import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.SparkSession
import spark.implicits._
object RDDtoDF {
    def main(args: Array[String]) {
case class Employee(id:Long,name: String, age: Long)
val spark = SparkSession
.builder
.master("local[*]")
.appName("Simple Application")
.getOrCreate()
val employeeDF = spark.sparkContext.textFile("file:///usr/local/spark/employee.txt").map(_.split(",")).map(attributes => Employee(attributes(0).trim.toInt,attributes(1), attributes(2).trim.toInt)).toDF()
employeeDF.createOrReplaceTempView("employee")
val employeeRDD = spark.sql("select id,name,age from employee")
employeeRDD.map(t => "id:"+t(0)+","+"name:"+t(1)+","+"age:"+t(2)).show()
    }
}





import org.apache.spark.sql.SparkSession
import java.util.Properties
import org.apache.spark.sql.types._
import org.apache.spark.sql.Row
object TestMySQL {
def main(args: Array[String]) {
val spark = SparkSession
.builder
.master("local[*]")
.appName("Simple Application")
.getOrCreate()
val employeeRDD = spark.sparkContext.parallelize(Array("3 Mary F 26","4 Tom M 23")).map(_.split(" "))
val schema = StructType(List(StructField("id", IntegerType, true),StructField("name", StringType, true),StructField("gender", StringType, true),StructField("age", IntegerType, true)))
val rowRDD = employeeRDD.map(p => Row(p(0).toInt,p(1).trim, p(2).trim,p(3).toInt))
val employeeDF = spark.createDataFrame(rowRDD, schema)
val prop = new Properties()
prop.put("user", "root") 
prop.put("password", "hadoop") 
prop.put("driver","com.mysql.jdbc.Driver")
employeeDF.write.mode("append").jdbc("jdbc:mysql://localhost:3306/sparktest", sparktest.employee", prop)
val jdbcDF = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/sparktest").option("driver","com.mysql.jdbc.Driver").option("dbtable","employee").option("user","root").option("password", "hadoop").load()
jdbcDF.agg("age" -> "max", "age" -> "sum")
    }
}
